/**
 * Package:com.afteryuan.util;
 * $Id$
 * Copyright(c) 2001-2009 www.afteryuan.com
 */
package com.afteryuan.util;

import java.util.regex.Pattern;

/**
 * HtmToTxt
 * <p/>
 * <p><a href="HtmToTxt.java.html"><i>View Source</i></a></p>
 *
 * @author <a href="mailto:afteryuan@gmail.com">$Author$</a>
 * @version $Revision$
 */
public class HtmToTxt {
    public static String html2Text(String inputString) {
        String htmlStr = inputString; //含html标签的字符串
        String textStr = "";
        java.util.regex.Pattern p_script;
        java.util.regex.Matcher m_script;
        java.util.regex.Pattern p_style;
        java.util.regex.Matcher m_style;
        java.util.regex.Pattern p_html;
        java.util.regex.Matcher m_html;

        try {
            String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; //定义script的正则表达式{或<script[^>]*?>[\\s\\S]*?<\\/script> }
            String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; //定义style的正则表达式{或<style[^>]*?>[\\s\\S]*?<\\/style> }
            String regEx_html = "<[^>]+>"; //定义HTML标签的正则表达式

            p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
            m_script = p_script.matcher(htmlStr);
            htmlStr = m_script.replaceAll(""); //过滤script标签

            p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
            m_style = p_style.matcher(htmlStr);
            htmlStr = m_style.replaceAll(""); //过滤style标签

            p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
            m_html = p_html.matcher(htmlStr);
            htmlStr = m_html.replaceAll(""); //过滤html标签

            textStr = htmlStr;

        } catch (Exception e) {
            System.err.println("html2Text: " + e.getMessage());
        }
        //替换特殊字符
        textStr = textStr.replaceAll("&nbsp;", " ");
        textStr = textStr.replaceAll("&ensp;", " ");
        textStr = textStr.replaceAll("&emsp;", " ");
        textStr = textStr.replaceAll("&lt;", "<");
        textStr = textStr.replaceAll("&gt;", ">");
        textStr = textStr.replaceAll("&amp;", "&");
        textStr = textStr.replaceAll("&quot;", "\"");
        textStr = textStr.replaceAll("&copy;", "©");
        textStr = textStr.replaceAll("&reg;", "®");
        textStr = textStr.replaceAll("&times;", "*");
        textStr = textStr.replaceAll("&divide;", "\\");

        return textStr;//返回文本字符串
    }

    public static void main(String[] args) {
        String h = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" +
                "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"zh-CN\" dir=\"ltr\">\n" +
                "  <head>\n" +
                "    <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>\n" +
                "    <title>java过滤html标签函数 - liubaoshan - JavaEye技术网站</title>\n" +
                "    <meta name=\"description\" content=\"public static String html2Text(String inputString) {             String htmlStr = inputString; //含html标签的字符串                 String textStr =&quot;&quot;;           java.util.regex.Pattern p_script;    ...\" />\n" +
                "    <meta name=\"keywords\" content=\"  java过滤html标签函数\" />\n" +
                "    <link rel=\"shortcut icon\" href=\"/images/favicon.ico\" type=\"image/x-icon\" />\n" +
                "    <link href=\"/rss\" rel=\"alternate\" title=\"liubaoshan\" type=\"application/rss+xml\" />\n" +
                "    <link href=\"http://www.javaeye.com/stylesheets/blog.css?1256178526\" media=\"screen\" rel=\"stylesheet\" type=\"text/css\" />\n" +
                "<link href=\"http://www.javaeye.com/stylesheets/themes/blog/blue.css?1235723792\" media=\"screen\" rel=\"stylesheet\" type=\"text/css\" />\n" +
                "    <script src=\"http://www.javaeye.com/javascripts/application.js?1248170042\" type=\"text/javascript\"></script>\n" +
                "      <link href=\"http://www.javaeye.com/javascripts/syntaxhighlighter/SyntaxHighlighter.css?1201588027\" media=\"screen\" rel=\"stylesheet\" type=\"text/css\" />\n" +
                "  <script src=\"http://www.javaeye.com/javascripts/syntaxhighlighter/shCoreCommon.js?1238329845\" type=\"text/javascript\"></script>\n" +
                "<script src=\"http://www.javaeye.com/javascripts/hotkey.js?1238403289\" type=\"text/javascript\"></script>\n" +
                "  <style>\n" +
                "    div#main {background-color: transparent; border: 0; padding: 0px; width: 740px;}\n" +
                "  </style>\n" +
                "  </head>\n" +
                "  <body>\n" +
                "    <div id=\"header\">\n" +
                "      <div id=\"blog_site_nav\">\n" +
                "  <a href=\"http://www.javaeye.com/\" class=\"homepage\">首页</a>\n" +
                "  <a href=\"http://www.javaeye.com/news\">新闻</a>\n" +
                "  <a href=\"http://www.javaeye.com/forums\">论坛</a>\n" +
                "  <a href=\"http://www.javaeye.com/blogs\">博客</a>\n" +
                "  <a href=\"http://www.javaeye.com/job\">招聘</a>\n" +
                "  <a href=\"#\" onclick=\"return false;\" id=\"msna\"><u>更多</u> <small>▼</small></a>\n" +
                "  <div class=\"quick_menu\" style=\"display:none;\">\n" +
                "    <a href=\"http://www.javaeye.com/ask\">问答</a>\n" +
                "    <a href=\"http://www.javaeye.com/wiki\">知识库</a>\n" +
                "    <a href=\"http://www.javaeye.com/groups\">圈子</a>\n" +
                "    <a href=\"http://www.javaeye.com/google_search\">搜索</a>\n" +
                "  </div>\n" +
                "</div>\n" +
                "      <div id=\"user_nav\">\n" +
                "      <a href=\"http://afteryuan.javaeye.com\" title=\"查看我的博客首页\" class=\"welcome\">欢迎afteryuan</a>\n" +
                "    \n" +
                "      <a href=\"http://app.javaeye.com/messages\" title=\"发送站内短信\">收件箱</a>\n" +
                "    \n" +
                "    \n" +
                "      <a href=\"http://app.javaeye.com\" title=\"我的应用首页\">我的应用</a>\n" +
                "    \n" +
                "    <div class=\"quick_menu\" style=\"display:none;\">\n" +
                "      <a href=\"http://app.javaeye.com/feed\" title=\"我订阅的好友动态消息\">我的订阅</a>\n" +
                "      <a href=\"http://app.javaeye.com/feed/my\" title=\"我参与的话题更新的消息\">我的参与</a>\n" +
                "      <a href=\"http://app.javaeye.com/chat\" title=\"用闲聊发表简短的话题\">我的闲聊</a>\n" +
                "      <a href=\"http://app.javaeye.com/mygroup\" title=\"我加入的圈子最新话题\">我的圈子</a>\n" +
                "      <a href=\"http://app.javaeye.com/links\" title=\"我收藏的网络资源链接\">我的收藏</a>\n" +
                "      <a href=\"http://app.javaeye.com/myresume\" title=\"我的个人简历\">我的简历</a>\n" +
                "    </div>\n" +
                "        <a href=\"http://afteryuan.javaeye.com/admin\" title=\"管理我的博客\">我的博客</a>\n" +
                "    <a href=\"http://app.javaeye.com/profile\" title=\"修改我的个人设置\">设置</a>\n" +
                "    <a href=\"/logout\" class=\"nobg\" onclick=\"var f = document.createElement('form'); f.style.display = 'none'; this.parentNode.appendChild(f); f.method = 'POST'; f.action = this.href;var m = document.createElement('input'); m.setAttribute('type', 'hidden'); m.setAttribute('name', '_method'); m.setAttribute('value', 'put'); f.appendChild(m);f.submit();return false;\">退出</a>\n" +
                "  </div>\n" +
                "    </div>\n" +
                "\n" +
                "    <div id=\"page\">\n" +
                "      <div id=\"branding\" class=\"clearfix\">\n" +
                "        <div id=\"blog_name\"><h1><a href=\"/\">liubaoshan</a></h1></div>\n" +
                "        <div id=\"blog_preview\"></div>\n" +
                "        <div id=\"blog_domain\">永久域名 <a href=\"/\">http://lbs.javaeye.com</a></div>\n" +
                "      </div>\n" +
                "      \n" +
                "      <div>\n" +
                "      <script type=\"text/javascript\">\n" +
                "      alimama_pid=\"mm_11218979_1572182_4470671\"; \n" +
                "      alimama_titlecolor=\"0000FF\"; \n" +
                "      alimama_descolor =\"000000\"; \n" +
                "      alimama_bgcolor=\"FFFFFF\"; \n" +
                "      alimama_bordercolor=\"E6E6E6\"; \n" +
                "      alimama_linkcolor=\"008000\"; \n" +
                "      alimama_bottomcolor=\"FFFFFF\"; \n" +
                "      alimama_anglesize=\"0\"; \n" +
                "      alimama_bgpic=\"0\"; \n" +
                "      alimama_icon=\"0\"; \n" +
                "      alimama_sizecode=\"11\"; \n" +
                "      alimama_width=760; \n" +
                "      alimama_height=60; \n" +
                "      alimama_type=2; \n" +
                "      </script>\n" +
                "      <script src=\"http://a.alimama.cn/inf.js\" type=text/javascript>\n" +
                "      </script>  \n" +
                "    </div>\n" +
                "      <div id=\"content\" class=\"clearfix\">\n" +
                "        <div id=\"main\">\n" +
                "          \n" +
                "\n" +
                "\n" +
                "          \n" +
                "\n" +
                "\n" +
                "<div class=\"blog_main\"> \n" +
                "  <div id=\"blog_nav\">\n" +
                "    \n" +
                "    <div id=\"pre_next\">\n" +
                "      <a href=\"/blog/289462\" class=\"next\" title=\"apathe和tomcat集成的总结\">apathe和tomcat集成的总结</a>\n" +
                "      |\n" +
                "      <a href=\"/blog/208054\" class=\"pre\" title=\"webwork的result跳转问题\">webwork的result跳转问题</a>\n" +
                "    </div>\n" +
                "  </div>\n" +
                "  <div class=\"blog_title\">\n" +
                "    <div class=\"date\"><span class='year'>2008</span><span class='sep_year'>-</span><span class='month'>06</span><span class='sep_month'>-</span><span class='day'>26</span></div>\n" +
                "    <h3 class='' title=''><a href=\"/blog/208056\">java过滤html标签函数</a></h3>\n" +
                "                \n" +
                "  </div>\n" +
                "\n" +
                "  <div class=\"blog_content\">\n" +
                "    public static String html2Text(String inputString) {\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; String htmlStr = inputString; //含html标签的字符串\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; String textStr =\"\";\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; java.util.regex.Pattern p_script;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; java.util.regex.Matcher m_script;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; java.util.regex.Pattern p_style;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; java.util.regex.Matcher m_style;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; java.util.regex.Pattern p_html;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; java.util.regex.Matcher m_html;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; try {\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; String regEx_script = \"&lt;[\\\\s]*?script[^&gt;]*?&gt;[\\\\s\\\\S]*?&lt;[\\\\s]*?\\\\/[\\\\s]*?script[\\\\s]*?&gt;\"; //定义script的正则表达式{或&lt;script[^&gt;]*?&gt;[\\\\s\\\\S]*?&lt;\\\\/script&gt; }\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; String regEx_style = \"&lt;[\\\\s]*?style[^&gt;]*?&gt;[\\\\s\\\\S]*?&lt;[\\\\s]*?\\\\/[\\\\s]*?style[\\\\s]*?&gt;\"; //定义style的正则表达式{或&lt;style[^&gt;]*?&gt;[\\\\s\\\\S]*?&lt;\\\\/style&gt; }\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; String regEx_html = \"&lt;[^&gt;]+&gt;\"; //定义HTML标签的正则表达式\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; m_script = p_script.matcher(htmlStr);\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; htmlStr = m_script.replaceAll(\"\"); //过滤script标签\n" +
                "<br />\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; m_style = p_style.matcher(htmlStr);\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; htmlStr = m_style.replaceAll(\"\"); //过滤style标签\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; m_html = p_html.matcher(htmlStr);\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; htmlStr = m_html.replaceAll(\"\"); //过滤html标签\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; textStr = htmlStr;\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }catch(Exception e) {\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; System.err.println(\"html2Text: \" + e.getMessage());\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; return textStr;//返回文本字符串\n" +
                "<br />&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; }&nbsp;&nbsp; \n" +
                "<br />\n" +
                "  </div>\n" +
                "\n" +
                "  \n" +
                "\n" +
                "  <script type=\"text/javascript\"><!--\n" +
                "google_ad_client = \"pub-4348265167276910\";\n" +
                "/* 468x60, 个人博客 */\n" +
                "google_ad_slot = \"2046406163\";\n" +
                "google_ad_width = 468;\n" +
                "google_ad_height = 60;\n" +
                "//-->\n" +
                "</script>\n" +
                "<script type=\"text/javascript\"\n" +
                "src=\"http://pagead2.googlesyndication.com/pagead/show_ads.js\">\n" +
                "</script>\n" +
                "\n" +
                "  \n" +
                "  \n" +
                "\n" +
                "  <div class=\"blog_bottom\">\n" +
                "    <ul>\n" +
                "      <li>00:18</li>\n" +
                "      <li>浏览 (1410)</li>\n" +
                "            <li><a href=\"#comments\">评论</a> (0)</li>\n" +
                "      <li>分类: <a href=\"/category/34817\">正则表达式</a></li>\n" +
                "      \n" +
                "      \n" +
                "        <li><a href='http://app.javaeye.com/links?user_favorite%5Btitle%5D=java%E8%BF%87%E6%BB%A4html%E6%A0%87%E7%AD%BE%E5%87%BD%E6%95%B0&amp;user_favorite%5Burl%5D=http%3A%2F%2Flbs.javaeye.com%2Fblog%2F208056' target='_blank' class='favorite'>收藏</a></li>\n" +
                "        \n" +
                "      \n" +
                "      \n" +
                "      <li class='last'><a href=\"http://www.javaeye.com/wiki/topic/208056\" target=\"_blank\" class=\"more\">相关推荐</a></li>\n" +
                "    </ul>\n" +
                "  </div>\n" +
                "\n" +
                "  <div class=\"blog_comment\">\n" +
                "    <h5>评论</h5>\n" +
                "    <a id=\"comments\" name=\"comments\"></a>\n" +
                "    \n" +
                "    \n" +
                "    \n" +
                "  </div>\n" +
                "\n" +
                "  <div class=\"blog_comment\">\n" +
                "    <h5>发表评论</h5>\n" +
                "          \n" +
                "      \n" +
                "        <a href=\"/index/forum_certify\" target=\"_blank\">完成论坛规则小测验以后，您才能在论坛发帖和回复。</a>\n" +
                "      \n" +
                "      </div>\n" +
                "</div>\n" +
                "\n" +
                "\n" +
                "<script type=\"text/javascript\">\n" +
                "  dp.SyntaxHighlighter.HighlightAll('code', true, true);\n" +
                "\n" +
                "  fix_image_size($$('div.blog_content img'), 700);\n" +
                "  function quote_comment(id) {\n" +
                "    new Ajax.Request('/editor/quote', {\n" +
                "      parameters: {'id':id, 'type':'Post'},\n" +
                "      onSuccess:function(response){editor.bbcode_editor.textarea.insertAfterSelection(response.responseText);}\n" +
                "    });\n" +
                "  }\n" +
                "</script>\n" +
                "\n" +
                "\n" +
                "<div id=\"search_engine_box\" style=\"display:none;\">\n" +
                "<h2>您正在搜索 java html 标签 过滤 <a href=\"#\" onclick=\"$('search_engine_box').hide();return false;\">[ X ]</a></h2>\n" +
                "<div>\n" +
                "关键字已在页面上高亮显示<br/>\n" +
                "您可能对下列文章也感兴趣<ul><li><a href='http://www.javaeye.com/topic/193350' title='HTML Head 参数详解' target='_blank'>HTML Head 参数详解</a></li><li><a href='http://www.javaeye.com/topic/210632' title='Strust2 标签 &lt;s:select/&gt;的疑问' target='_blank'>Strust2 标签 &lt;s:select/&gt;的疑问</a></li><li><a href='http://www.javaeye.com/topic/296706' title='扩展displaytag标签库 标记' target='_blank'>扩展displaytag标签库 标记</a></li><li><a href='http://www.javaeye.com/topic/40188' title='项目中用到的一个小工具类(字符过滤器)' target='_blank'>项目中用到的一个小工具类(字符 ...</a></li><li><a href='http://www.javaeye.com/topic/154250' title='JAVA中使用Htmlparse解析HTML文档' target='_blank'>JAVA中使用Htmlparse解析HTML文 ...</a></li></ul>\n" +
                "<a href='#' onclick=\"var f = document.createElement('form'); f.target = '_blank'; f.style.display = 'none'; this.parentNode.appendChild(f); f.method = 'POST'; f.action = 'http://www.javaeye.com/search?type=all';var m = document.createElement('input'); m.setAttribute('type', 'hidden'); m.setAttribute('name', 'query'); m.setAttribute('value', 'java html 标签 过滤'); f.appendChild(m);f.submit();return false;\">站内搜索更多 java html 标签 过滤</a>\n" +
                "</div>\n" +
                "</div>\n" +
                "<script src=\"http://www.javaeye.com/javascripts/se_hilite.js?1224057847\" type=\"text/javascript\"></script>\n" +
                "<script type=\"text/javascript\">\n" +
                "document.observe('dom:loaded',function() {\n" +
                "  Hilite.hiliteElement($(\"main\"), \"java html 标签 过滤\");\n" +
                "      document.body.appendChild($('search_engine_box').remove());\n" +
                "      $('search_engine_box').appear();\n" +
                "});\n" +
                "</script>\n" +
                "      \n" +
                "        </div>\n" +
                "\n" +
                "        <div id=\"local\">\n" +
                "          <div class=\"local_top\"></div>\n" +
                "          <div id=\"blog_owner\">\n" +
                "            <div id=\"blog_owner_logo\"><a href='http://lbs.javaeye.com'><img alt=\"liubaoshan的博客\" class=\"logo\" src=\"http://www.javaeye.com/upload/logo/user/28359/86dae072-e250-36fd-96ea-06d284335e2f.jpg?1236833034\" title=\"liubaoshan的博客: liubaoshan\" /></a></div>\n" +
                "            <div id=\"blog_owner_name\">liubaoshan</div>\n" +
                "          </div>\n" +
                "          \n" +
                "            <div id=\"twitter_decorate\">&nbsp;</div>\n" +
                "            <div id=\"twitter\" class=\"twitter clearfix\">\n" +
                "              \n" +
                "我要奋斗！奋斗！奋斗！\n" +
                "<br/><em><a href=\"http://lbs.javaeye.com/blog/chat/35642\">2009-07-17</a>\n" +
                " 通过网页\n" +
                " \n" +
                "</em>\n" +
                "       <a href=\"http://app.javaeye.com/chat?twitter%5Bbody%5D=%40liubaoshan+&amp;twitter%5Breply_to_id%5D=35642\" title=\"回复这个闲聊\"><img alt=\"Reply_twitter\" src=\"http://www.javaeye.com/images/app/reply_twitter.gif?1232617931\" /></a>\n" +
                "              <div class=\"right\"><a href=\"/blog/chat\">&gt;&gt;更多闲聊</a></div>\n" +
                "            </div>\n" +
                "          \n" +
                "          <div id=\"blog_actions\">\n" +
                "            <ul>\n" +
                "              <li>浏览: 3710 次</li>\n" +
                "              <li>性别: <img alt=\"Icon_minigender_1\" src=\"http://www.javaeye.com/images/icon_minigender_1.gif?1192779195\" title=\"男\" /></li>\n" +
                "              <li>来自: 深圳</li>\n" +
                "              <li><img src='/images/status/offline.gif'/></li>\n" +
                "              <li>\n" +
                "                <a href=\"/blog/profile\" class=\"profile\">详细资料</a>\n" +
                "                <a href=\"/blog/guest_book\" class=\"guest_book\">留言簿</a>\n" +
                "              </li>\n" +
                "              \n" +
                "                <li>\n" +
                "                  <a href=\"http://app.javaeye.com/messages/new?message%5Breceiver_name%5D=liubaoshan\" class=\"message\" title=\"发送站内短信\">发短消息</a>\n" +
                "                  \n" +
                "                    <a href=\"http://app.javaeye.com/feed?subscription%5Bsubscribed_user_name%5D=liubaoshan\" class=\"subscription\" onclick=\"var f = document.createElement('form'); f.style.display = 'none'; this.parentNode.appendChild(f); f.method = 'POST'; f.action = this.href;f.submit();return false;\">订阅</a>\n" +
                "                  \n" +
                "                </li>\n" +
                "              \n" +
                "            </ul>\n" +
                "            <h5>搜索本博客</h5>\n" +
                "            <form action=\"/blog/search\" method=\"get\">              <input class=\"text\" id=\"query\" name=\"query\" style=\"margin-left: 10px;width: 110px;\" type=\"text\" />\n" +
                "              <input class=\"submit\" type=\"submit\" value=\"搜索\" />\n" +
                "            </form>          </div>\n" +
                "          \n" +
                "            <div id=\"user_visits\" class=\"clearfix\">\n" +
                "              <h5>最近访客 <span style='font-weight:normal;font-size:12px;padding-left:30px;'><a href=\"/blog/user_visits\">&gt;&gt;更多访客</a></span></h5>\n" +
                "              \n" +
                "                <div class=\"user_visit\">\n" +
                "                  <div class=\"logo\"><a href='http://afteryuan.javaeye.com' target='_blank'><img alt=\"afteryuan的博客\" class=\"logo\" src=\"http://www.javaeye.com/images/user-logo-thumb.gif?1194185304\" title=\"afteryuan的博客: afteryuan\" /></a></div>\n" +
                "                  <div class=\"left\"><a href='http://afteryuan.javaeye.com' target='_blank'>afteryuan</a></div>\n" +
                "                </div>\n" +
                "              \n" +
                "                <div class=\"user_visit\">\n" +
                "                  <div class=\"logo\"><a href='http://mxdba321123.javaeye.com' target='_blank'><img alt=\"mxdba321123的博客\" class=\"logo\" src=\"http://www.javaeye.com/images/user-logo-thumb.gif?1194185304\" title=\"mxdba321123的博客: \" /></a></div>\n" +
                "                  <div class=\"left\"><a href='http://mxdba321123.javaeye.com' target='_blank'>mxdba321123</a></div>\n" +
                "                </div>\n" +
                "              \n" +
                "                <div class=\"user_visit\">\n" +
                "                  <div class=\"logo\"><a href='http://yangkun0318-126-com.javaeye.com' target='_blank'><img alt=\"苍山洱海的博客\" class=\"logo\" src=\"http://www.javaeye.com/images/user-logo-thumb.gif?1194185304\" title=\"苍山洱海的博客: \" /></a></div>\n" +
                "                  <div class=\"left\"><a href='http://yangkun0318-126-com.javaeye.com' target='_blank'>苍山洱海</a></div>\n" +
                "                </div>\n" +
                "              \n" +
                "                <div class=\"user_visit\">\n" +
                "                  <div class=\"logo\"><a href='http://geniuschess.javaeye.com' target='_blank'><img alt=\"geniuschess的博客\" class=\"logo\" src=\"http://www.javaeye.com/images/user-logo-thumb.gif?1194185304\" title=\"geniuschess的博客: geniuschess\" /></a></div>\n" +
                "                  <div class=\"left\"><a href='http://geniuschess.javaeye.com' target='_blank'>geniuschess</a></div>\n" +
                "                </div>\n" +
                "              \n" +
                "            </div>\n" +
                "          \n" +
                "                      <div id=\"blog_menu\">\n" +
                "              <h5>博客分类</h5>\n" +
                "              <ul>\n" +
                "                <li><a href=\"/\">全部博客 (14)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34176\">spring相关 (0)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34177\">hibernate相关 (0)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34178\">ajax相关 (0)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34179\">webwork相关 (1)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34180\">java相关 (2)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34183\">javascript相关 (1)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34185\">每天一贴 (1)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34817\">正则表达式 (2)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34822\">DWR相关 (0)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/34824\">css (1)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/49872\">其它 (3)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/49874\">eclipse (1)</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/category/49875\">Freemarker (2)</a></li>\n" +
                "                \n" +
                "              </ul>\n" +
                "            </div>\n" +
                "            \n" +
                "            \n" +
                "            <div id=\"blog_others\">\n" +
                "              <h5>其他分类</h5>\n" +
                "              <ul>\n" +
                "                <li><a href=\"/blog/favorite\">我的收藏</a> (55)</li>\n" +
                "                                                <li><a href=\"/blog/forum\">我的论坛帖子</a> (13)</li>\n" +
                "                <li><a href=\"/blog/article\">我的精华良好贴</a> (0)</li>\n" +
                "                              </ul>\n" +
                "            </div>\n" +
                "            \n" +
                "            <div id=\"recent_groups\">\n" +
                "              <h5>最近加入圈子</h5>\n" +
                "              <ul>\n" +
                "                \n" +
                "                  <li><a href=\"http://jbpm.group.javaeye.com\">JBPM @net</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"http://EXT.group.javaeye.com\">EXT</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"http://analysis.group.javaeye.com\">中文分词</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"http://lucene-group.group.javaeye.com\">lucene爱好者</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"http://ecside.group.javaeye.com\">GT-Grid</a></li>\n" +
                "                \n" +
                "              </ul>\n" +
                "            </div>\n" +
                "            \n" +
                "            \n" +
                "                          <div id=\"month_blogs\">\n" +
                "                <h5>存档</h5>\n" +
                "                <ul>\n" +
                "                  \n" +
                "                    <li><a href=\"/blog/monthblog/2009-01\">2009-01</a> (1)</li>\n" +
                "                  \n" +
                "                    <li><a href=\"/blog/monthblog/2008-12\">2008-12</a> (8)</li>\n" +
                "                  \n" +
                "                    <li><a href=\"/blog/monthblog/2008-06\">2008-06</a> (5)</li>\n" +
                "                  \n" +
                "                  <li><a href=\"/blog/monthblog_more\">更多存档...</a></li>\n" +
                "                </ul>\n" +
                "              </div>\n" +
                "                        <div id=\"recent_comments\">\n" +
                "              <h5>最新评论</h5>\n" +
                "              <ul>\n" +
                "                \n" +
                "                  <li>\n" +
                "                    <a href=\"/blog/289479#comments\" title=\"Java如何操作Excel文件\">Java如何操作Excel文件</a><br/>\n" +
                "                    所有jar包下载地址已更新到 http://jar.cn13e.com\n" +
                "http: ...<br/>\n" +
                "                    -- by <a href='http://xxxxxggg.javaeye.com' target='_blank'>xxxxxggg</a>\n" +
                "                  </li>\n" +
                "                \n" +
                "                  <li>\n" +
                "                    <a href=\"/blog/289507#comments\" title=\"FreeMarker实现递归的方法\">FreeMarker实现递归的方法</a><br/>\n" +
                "                    bromon 写道使用宏来做递归更好：\n" +
                "&lt;#macro buildNode  ...<br/>\n" +
                "                    -- by <a href='http://ember319.javaeye.com' target='_blank'>ember_319</a>\n" +
                "                  </li>\n" +
                "                \n" +
                "                  <li>\n" +
                "                    <a href=\"/blog/289507#comments\" title=\"FreeMarker实现递归的方法\">FreeMarker实现递归的方法</a><br/>\n" +
                "                    使用宏来做递归更好：\n" +
                "&lt;#macro buildNode child par ...<br/>\n" +
                "                    -- by <a href='http://bromon.javaeye.com' target='_blank'>bromon</a>\n" +
                "                  </li>\n" +
                "                \n" +
                "              </ul>\n" +
                "            </div>\n" +
                "            <div id=\"comments_top\">\n" +
                "              <h5>评论排行榜</h5>\n" +
                "              <ul>\n" +
                "                \n" +
                "                  <li><a href=\"/blog/289507\" title=\"FreeMarker实现递归的方法\">FreeMarker实现递归的方法</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/blog/289479\" title=\"Java如何操作Excel文件\">Java如何操作Excel文件</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/blog/310083\" title=\"人生经典80句 \">人生经典80句 </a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/blog/289462\" title=\"apathe和tomcat集成的总结\">apathe和tomcat集成的总结</a></li>\n" +
                "                \n" +
                "                  <li><a href=\"/blog/289480\" title=\"eclipse快捷键\">eclipse快捷键</a></li>\n" +
                "                \n" +
                "              </ul>\n" +
                "            </div>\n" +
                "            <div id=\"rss\">\n" +
                "              <ul>\n" +
                "                <li><a href=\"/rss\" target=\"_blank\"><img alt=\"Rss\" src=\"http://www.javaeye.com/images/rss.png?1195448807\" /></a></li>\n" +
                "                <li><a href=\"http://fusion.google.com/add?feedurl=http://lbs.javaeye.com/rss\" target=\"_blank\"><img alt=\"Rss_google\" src=\"http://www.javaeye.com/images/rss_google.gif?1197872161\" /></a></li>\n" +
                "                <li><a href=\"http://www.zhuaxia.com/add_channel.php?url=http://lbs.javaeye.com/rss\" target=\"_blank\"><img alt=\"Rss_zhuaxia\" src=\"http://www.javaeye.com/images/rss_zhuaxia.gif?1197872161\" /></a></li>\n" +
                "                <li><a href=\"http://www.xianguo.com/subscribe.php?url=http://lbs.javaeye.com/rss\" target=\"_blank\"><img alt=\"Rss_xianguo\" src=\"http://www.javaeye.com/images/rss_xianguo.jpg?1206446465\" /></a></li>\n" +
                "                <li><a href=\"http://www.google.com/search?hl=zh-CN&q=RSS\">[什么是RSS?]</a></li>\n" +
                "              </ul>\n" +
                "            </div>\n" +
                "            <div class=\"local_bottom\"></div>\n" +
                "          \n" +
                "        </div>\n" +
                "      </div>\n" +
                "\n" +
                "      <div id=\"footer\" class=\"clearfix\">\n" +
                "        <div id=\"copyright\">\n" +
                "          <hr/>\n" +
                "          声明：JavaEye文章版权属于作者，受法律保护。没有作者书面许可不得转载。若作者同意转载，必须以超链接形式标明文章原始出处和作者。<br />\n" +
                "          &copy; 2003-2009 JavaEye.com.   All rights reserved. 上海炯耐计算机软件有限公司 [ 沪ICP备05023328号 ]\n" +
                "        </div>\n" +
                "      </div>\n" +
                "    </div>\n" +
                "    \n" +
                "  <div id=\"chat_input\" style=\"display:none;\">\n" +
                "    <iframe id=\"chat_iframe\" frameborder=\"0\" scrolling=\"no\" marginwidth=\"0\" marginheight=\"0\" allowTransparency=\"true\"></iframe>\n" +
                "  </div>\n" +
                "  <div id=\"chat_button\"><a href=\"#\" style=\"text-decoration:none;color:black\" onclick=\"$('chat_input').show();$('chat_iframe').src='/chat/widget';return false;\">说点啥吧 <img alt=\"Chat\" src=\"http://www.javaeye.com/images/app/chat.gif?1236064059\" /></a></div>\n" +
                "\n" +
                "<script type=\"text/javascript\">\n" +
                "  document.write(\"<img src='http://stat.javaeye.com/?url=\"+ encodeURIComponent(document.location.href) + \"&referrer=\" + encodeURIComponent(document.referrer) + \"&logged_in=yes\" + \"' width='0' height='0' />\");\n" +
                "</script>\n" +
                "    \n" +
                "  </body>\n" +
                "</html>";
        System.out.println(HtmToTxt.html2Text(h));
    }
}
